clear all
cap ssc install ineqdeco 
set more off
set maxvar 10000


import excel "$SupplementaryData/wid_summarystats_1913to2021.xlsx", sheet("gini") firstrow

    tempfile actual 
    save `actual'
  
**------------------------**
**------------------------**

****************************
* PANEL A
****************************
    
	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
    
    gen gini_dad_baseline=. 
    gen gini_dad_byocc=.

    replace log_father_baseline= exp(log_father_baseline) 
    replace log_father_byocc_interp = exp(log_father_byocc_interp) 

    forval d=1/7 { 
        quietly ineqdeco log_father_baseline if decade==19`d'0  [aw=wgt_sex_race]
        replace gini_dad_baseline=`r(gini)' if decade==19`d'0   

        quietly ineqdeco log_father_byocc_interp if decade==19`d'0  [aw=wgt_sex_race]
        replace gini_dad_byocc=`r(gini)' if decade==19`d'0      
    } 

    sort decade
    keep decade gini_dad_baseline gini_dad_byocc 
    bysort decade: keep if _n==1
    rename decade year
        
    tempfile data1
    save `data1'
    
    use `actual', clear
    merge 1:1 year using `data1'
    
    sort year
    
    #delimit ;
    twoway 
           (connect gini_dad_baseline year if year<=1980, lpat(solid) lwidth(0.5) msymbol(circle) mc(blue*0.75) lc(blue*0.75) yaxis(1))
           (connect gini year if year<=1980, lpat(dash) lwidth(0.5) msymbol(circle) mc(gray*0.75) lc(gray*0.75) yaxis(2)),     
    yti("Gini coefficient (parental income)" " ", axis(1)) legend(on ring(0) rows(2) pos(8) order(1 "Parental income" 2 "National income")) 
    yti("Gini coefficient (national income)" " ", axis(2)) 
    xtitle(" " "Year") xlabel(1910(10)1980);    
    #delimit cr
    graph export "$Mydirectory2/appendix_a/gini_comparison_baseline.pdf", as(pdf) replace   
        
**------------------------**
**------------------------**

************
* PANEL B
************

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1  
    drop year
    
    replace log_father_baseline = exp(log_father_baseline)
    rename decade year

    * Calculate all income per decade; first in top 10 and then in bottom 50
    preserve
        collapse (sum) log_father_baseline [aw=wgt_sex_race], by(year)
        
        tempfile allinc
        save `allinc'
    restore
    
    preserve 
        keep if  rank_father_baseline>=90
        
        collapse (sum) log_father_baseline  [aw=wgt_sex_race], by(year)
        rename log_father_baseline  top10
        
        tempfile inc10
        save `inc10'
    restore
    
    preserve 
        keep if rank_father_baseline<=50
        
        collapse (sum)  log_father_baseline [aw=wgt_sex_race], by(year)
        rename  log_father_baseline bottom50
        
        tempfile inc50
        save `inc50'
    restore
    
    use `actual', clear
    merge 1:1 year using `allinc', nogen
    merge 1:1 year using `inc10', nogen
    merge 1:1 year using `inc50', nogen
    
    gen share_top10 = (top10 / bottom50)*100

    * Figure
    #delimit ;
    twoway 
           (connect share_top10 year if year<=1980, lpat(solid) lwidth(0.5) msymbol(circle) mc(blue*0.75) lc(blue*0.75) yaxis(1))
           (connect p10p50 year if year<=1980, lpat(dash) lwidth(0.5) msymbol(circle) mc(gray*0.75) lc(gray*0.75) yaxis(2)),       
    ylabel(15(5)35, axis(1)) yti("Top10/Bottom50 ratio (parental income)" " ", axis(1)) legend(on ring(0) rows(2) pos(8) order(1 "Parental income" 2 "National income")) 
    ylabel(8(2)18, axis(2)) yti("Top10/Bottom50 ratio (national income)" " ", axis(2))
    xtitle(" " "Year") xlabel(1910(10)1980);    
    #delimit cr 
    graph export "$Mydirectory2/appendix_a/p10p50_comparison_baseline.pdf", as(pdf) replace   

**------------------------**
**------------------------**

************
* PANEL C
************
   
* Bring in csv (from Margo's Figure 1), clean, and save 
    import delimited "$SupplementaryData/Margo_Census.csv", clear

    gen year=round(v1)
    drop v1

    bysort year: egen income=mean(v2)
    drop v2
    
    quietly bysort year:gen dup = cond(_N==1,0,_n)
    drop if dup>1

    tempfile gaps 
    save `gaps'

* Now start from scratch 
    clear all

    set obs 200

    gen year = 1860 + (_n-1)

    merge 1:1 year using `gaps', assert(1 3) nogen
    
    tempfile gaps2
    save `gaps2'

* Bring in Jácome et al. data, calculate and save gaps  
	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
            
    gen ratio = .
    gen levels = exp(log_father_personalinc_interp)
    
    forval x=1910(10)1970 {
        sum levels if decade==`x' & race==1 [aw=wgt_sex_race]
            local num1 = `r(mean)'
        
        sum levels if decade==`x' & race==2 [aw=wgt_sex_race]
            local num2 = `r(mean)'
            
        local ratio1 = `num2' / `num1'
        replace ratio = `ratio1' if decade==`x'
    }
    
    bysort decade: keep if _n==1
    keep decade ratio 
    rename decade year
    
    tempfile ourratio_`baseline'
    save `ourratio_`baseline''
    
* Merge data
    use `gaps2', clear
    merge 1:1 year using `ourratio_`baseline'' , assert(1 3) nogen 
    
* Higgs estimates from Margo's Appendix B   
    gen higgs = .
    replace higgs = 0.24 if year==1870
    replace higgs = 0.35 if year==1900
    replace higgs = 0.34 if year==1940

* Margo estimates from Margo's Appendix B
    replace income = 0.28 if year == 1870
    replace income = 0.32 if year == 1900
    replace income = 0.38 if year == 1940
    
    keep if year>=1870 & year<2020
    
    local ub "0.9"
    local lb "0.4"
    local skip "0.1"
    
    #delimit ;
        twoway (connect income year if year<=1940, lcolor(purple) mcolor(purple) msymbol(circle)) 
        (connect higgs year, lcolor(lavender) mcolor(lavender) msymbol(circle_hollow)) 
        (connect income year if year>1940, lcolor(teal) mcolor(teal) msymbol(none) lwidth(0.25)) 
        (connect ratio year, lcolor(blue) mcolor(blue) msymbol(square)),
        legend(on order(1 "Margo" 2 "Higgs" 3 "Census" 4 "Baseline"))
        ytitle("Black-white income gap" " ", axis(1))
        ylabel(0.2(.1).7, axis(1)) 
        xtitle(" " "Year") xlabel(1870(20)2020);
    #delimit cr
    graph export "$Mydirectory2/appendix_a/income_gap_historical_baseline.pdf", as(pdf) replace
    